/*******************************************************************************
*	Last Update: 19 July, 2023													
*																				
* 	This STATA code reproduces the main tables (Tables 1 to 10) of the paper. 		
* 						
*	As a general rule, we replace the content of the variables with a random number. 
*   Specifically, for continuous variables, we have replaced the value with numbers drawn 
*   from a uniform between zero and one; for dichotomous variables we have randomized 
*   an indicator variable. For all datasets, the size of the sample is consistent with 
*   the sample size used in the paper.
				
********************************************************************************/

version 14

clear all
set more off

set scheme s1color
set logtype text

cd "" // YOUR WORKING DIRECTORY

********************************************************************************
** Table 1: Summary statistics
********************************************************************************

use "./data/allfirmsample.dta", replace

keep if samplerestriction == 1 

* Panel A: All firms
tabstat totalassets if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.0f)
tabstat firmage if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.0f)
tabstat cashflow if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat profitability if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat listingstatus if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat investment if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat leverage if allfirmsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat tfp if tfpsample == 1, stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)

use "./data/baselinesample.dta", replace 

keep if baselinesample == 1 & samplerestriction == 1

* Panel B: Standalone firms
tabstat totalassets , stat(n mean sd p25 p50 p75) column(statistics) format(%9.0f)
tabstat firmage, stat(n mean sd p25 p50 p75) column(statistics) format(%9.0f)
tabstat cashflow , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat profitability , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat listingstatus , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat investment , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat leverage , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)
tabstat tfp , stat(n mean sd p25 p50 p75) column(statistics) format(%9.2f)

********************************************************************************
** Table 2: Productive efficiency and investment around GQ upgrades
********************************************************************************
use "./data/allfirmsample.dta", replace 

keep if samplerestriction == 1

reghdfe tfp postgq if tfpsample == 1, absorb(firmid nic2digit#fyear stateid#fyear) vce (clus cityid) dof(cl)
reghdfe daysofinventory postgq if inventorysample == 1, absorb(firmid stateid#fyear nic2digit#fyear) clus(cityid)
reghdfe investment postgq if allfirmsample == 1, absorb(firmid stateid#fyear nic2digit#fyear) clus(cityid)

********************************************************************************
* Table 3: Business Group Prevalence and Standalone Investment 
********************************************************************************
use "./data/baselinesample.dta", replace 

keep if baselinesample == 1 & samplerestriction == 1

** column 1
reghdfe investment postgq postgq_highbgs, absorb(firmid stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

** column 2 
reghdfe investment postgq postgq_highbgs, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

********************************************************************************
** Table 4: PreGQ regional characteristics
********************************************************************************
use "./data/baselinesample.dta", replace 

keep if pregqchar == 1 

************************
* Firm characteristics *
************************
foreach var of varlist logtotalassets firmage cashflow profitability investment leverage tfp {

	* Difference in means
	tabstat `var',  by(highbgs) stat(mean p50) format(%9.2f)
	reg `var' highbgs, robust 
	test highbgs=0

	* Difference in medians
	qreg2 `var' highbgs, q(0.5) 
	qui test highbgs=0
}

foreach var of varlist daysofinventory {

	* Difference in means
	tabstat `var' 		if inrange(daysofinventory,5,150), by(highbgs) stat(mean p50) save
	reg `var' highbgs 	if inrange(daysofinventory,5,150), robust 
	test highbgs=0

	* Difference in medians
	qreg2 `var' highbgs if inrange(daysofinventory,5,150), q(0.5) 
	test highbgs=0
}

*******************************************************
* Physical infrastructure and Labor market conditions *
*******************************************************
use "./data/pregqregionalcharacteristics_labor_infra.dta", replace 

foreach var of varlist rating_roadways obstacle_transport labor_constraint obstacle_labor fill_manager {

	* Difference in means
	tabstat `var' , by(highbgs) stat(mean p50) format(%9.2f)
	reghdfe `var' highbgs, noabsorb clus(cityid)
	test highbgs=0

	* Difference in medians
	qreg2 `var' highbgs, q(0.5) clus(cityid)
	test highbgs=0

}

*************************
* Financial development *
*************************
use "./data/pregqregionalcharacteristics_findevelopment.dta", replace 

foreach var of varlist bankbranchespercapita {

	* Difference in means
	tabstat `var' , by(highbgs) stat(mean p50) save
	reghdfe `var' highbgs, noabsorb clus(cityid)
	test highbgs=0

	* Difference in medians
	qreg2 `var' highbgs, q(0.5) clus(cityid)
	test highbgs=0
}

****************************************
* Listed share by business group share *
****************************************
use "./data/pregqregionalcharacteristics_findevelopment_listedshare.dta", replace 

reg share_listed highbgs, robust
di %6.2fc _b[_cons]
di %6.2fc _b[highbgs] + _b[_cons]
di %6.3fc _b[highbgs]

qreg2 share_listed highbgs, q(0.5) 
di %6.2fc _b[_cons]
di %6.2fc _b[highbgs] + _b[_cons]
di %6.3fc _b[highbgs]

********************************************************************************
* Table 5: Horse race regressions
********************************************************************************
use "./data/baselinesample.dta", replace 

keep if baselinesample == 1 & samplerestriction == 1

**********************
* Panel A: All firms *
**********************
* Baseline
reghdfe investment postgq postgq_highbgs, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* Baseline + Fraction of listed firms
reghdfe investment postgq postgq_highbgs postgq_high_share_listed, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* Baseline + average age
reghdfe investment postgq postgq_highbgs postgq_high_firmage, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)
	
* Baseline + average TFP (only manufacturing)
reghdfe investment postgq postgq_highbgs postgq_high_tfp if manufacturing == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* All measures in one (only manufacturing)
reghdfe investment postgq postgq_highbgs postgq_high_share_listed postgq_high_firmage postgq_high_firmsize postgq_high_tfp if manufacturing == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)


*****************************
* Panel B: Standalone firms *
*****************************
* Baseline
reghdfe investment postgq postgq_highbgs, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* Baseline + Fraction of listed firms
reghdfe investment postgq postgq_highbgs postgq_high_share_listed_sa, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* Baseline + average age
reghdfe investment postgq postgq_highbgs postgq_high_firmage_sa, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* Baseline + average TFP (only manufacturing)
reghdfe investment postgq postgq_highbgs postgq_high_tfp_sa if manufacturing == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* All measures in one (only manufacturing)
reghdfe investment postgq postgq_highbgs postgq_high_share_listed_sa postgq_high_firmage_sa postgq_high_firmsize_sa postgq_high_tfp_sa if manufacturing == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

********************************************************************************
* Table 6: Investment by firm type 
********************************************************************************
use "./data/baselinesample.dta", replace 

keep if samplerestriction == 1

* column 1: All firms
reghdfe investment postgq postgq_highbgs , absorb(firmid  nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* column 2: baseline, standalone firms 
reghdfe investment postgq postgq_highbgs if baselinesample == 1, absorb(firmid  nic2digit#fyear highbgs#nic2digit#fyear  stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* column 3: group firms
reghdfe investment postgq postgq_highbgs if businessgroup == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

********************************************************************************
** Table 7: Aggregate bank lending around GQ upgrades: District-level evidence
********************************************************************************
** Data at the district-fyear level (from the RBI)
use "./data/macrolendingsample.dta", replace 

reghdfe logcredit postgq, absorb(districtid fyear) clus(districtid)
reghdfe logcredit postgq, absorb(districtid stateid#fyear) clus(districtid)
reghdfe logcredit postgq postgq_highbgs, absorb(districtid fyear) clus(districtid)
reghdfe logcredit postgq postgq_highbgs, absorb(districtid stateid#fyear) clus(districtid)

********************************************************************************
** Table 8: Loan-level regressions: Bank lending to standalone firms
********************************************************************************
** Collapsed data into bank-firm-event-time level
use "./data/bankloansample.dta", replace 

* Count the total number of loans used in the estimations
reghdfe change_logloanamount std_groupexposure [aweight = pregq_totalassets], a(fe=firmid) vce(clus bankid)
bysort firmid bankid (post): egen maxfe = max(fe)

gen nloans_maxfe = numberofloans if missing(maxfe) == 0
egen totalnloans_maxfe = total(nloans_maxfe)

reghdfe change_logloanamount std_groupexposure, a(firmid) vce(clus bankid)
summ totalnloans_maxfe

reghdfe change_logloanamount std_groupexposure [aweight = pregq_totalassets], a(firmid) vce(clus bankid)
summ totalnloans_maxfe

********************************************************************************
* Table 9: Mechanism: Crowding out demand for standalone firms' output
********************************************************************************
use "./data/baselinesample.dta", replace
 
keep if baselinesample == 1 & samplerestriction == 1

* column 1: manufacturing firms 
reghdfe investment postgq postgq_highbgs if manufacturing == 1, absorb(firmid stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

* column 2: High-exporting industries
reghdfe investment postgq postgq_highbgs if highexportingindustries == 1, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus cityid)

********************************************************************************
* Table 10: Mechanism: Rival investment opportunities
********************************************************************************
use "./data/productlevelsample.dta", replace 

keep if productlevelsample == 1

* column 1: all products
reghdfe investment postgq postgq_highbgs, absorb(firmid nic2digit#fyear highbgs#nic2digit#fyear stateid#fyear highbgs#stateid#fyear) vce (clus mainproduct_nic5 cityid)
	
* column 2: all products
reghdfe investment postgq postgq_highbgs , absorb(firmid  nic2digit#fyear  stateid#fyear) vce (clus cityid mainproduct_nic5)
	
* column 3: SA dominant products
reghdfe investment postgq postgq_highbgs if sadominantproducts == 1, absorb(firmid  nic2digit#fyear  stateid#fyear) vce (clus cityid mainproduct_nic5)












